# geometry of the puma 2022 shapefiles
puma <- read_sf(here("data", "tl_2022_06_puma20", "tl_2022_06_puma20.shp"))
puma_la <- puma %>%
filter(grepl(037, PUMACE20)) %>%
rename(PUMA = PUMACE20)
pums_housing_22 <- read_csv(here::here("data", "pums_2022.csv"))
# https://www.huduser.gov/portal/datasets/il/il2020/2020summary.odn
people <- c(1, 2, 3, 4, 5, 6, 7, 8)
very_low <- c(39450, 45050, 50700, 56300, 60850, 65350, 69850, 74350)
extreme_low <- c(23700, 27050, 30450, 33800, 36550, 39250, 41950, 44650)
low <- c(63100, 72100, 81100, 90100, 97350, 104550, 111750, 118950)
income_breaks <- data.frame(people, extreme_low, very_low, low)
# remove the duplicates from 2022 data
puma_house_codes <- pums_housing_22 %>%
select(SERIALNO, PUMA) %>%
distinct()
puma_22 <- pums_housing_22 %>%
inner_join(puma_la, by = "PUMA") %>%
select(SERIALNO, WGTP, NP, HINCP, FINCP, GRPIP) %>%
mutate(NP = as.numeric(NP),
HINCP = as.numeric(HINCP),
FINCP = as.numeric(FINCP),
GRPIP = as.numeric(GRPIP)) %>%
group_by(SERIALNO) %>%
summarize_all(mean, na.rm = TRUE) %>%
left_join(puma_house_codes)
la_tract_vacant <- get_acs(
state = "CA",
county = "Los Angeles",
geography = "tract",
variables = "B25004_001",
geometry = TRUE,
year = 2022
)
la_tract <- la_tract_vacant %>%
mutate(NAME = gsub("; Los Angeles County; California", # elements that you want to remove
"", # replace with blank
NAME)) %>%
mutate(NAME = gsub("Census Tract ", # elements that you want to remove
"", # replace with blank
NAME)) %>%
filter(GEOID != "06037599100") %>% # islands
filter(GEOID != "06037599000") %>% # islands
filter(GEOID != "06037980003") %>%
filter(GEOID != "06037980004") %>%
filter(!(NAME >= 9000 & NAME <= 9800))Final Infographic
Data
Wrangle and Clean Data
https://www.census.gov/cgi-bin/geo/shapefiles/index.php
Looking at the places where affordable housing projects started
# read in affordable housing projects in LA from 2003 to present
ah_raw <- read_csv(here("data/LAHD_Affordable_Housing_Projects_List__2003_to_Present__20240119.csv")) %>%
clean_names()
# clean affordable housing data
ah_clean <- ah_raw %>%
mutate(fun_date = as.Date(date_funded, tryFormats = c("%m/%d/%Y"))) %>% # make date_funded as date
mutate(year = lubridate::year(fun_date)) %>% # make into year column
select(name, year, fun_date, construction_type, site_community, total_units = project_total_units,
housing_type, lahd_funded, in_service_date, gps_coords_on_map) %>%
mutate(gps_coords_on_map = gsub("[POINT()]", # elements that you want to remove
"", # replace with blank
gps_coords_on_map)) %>% # remove these elements from gps column
separate_wider_delim(gps_coords_on_map,
delim = " ", names = c("empty", "coords"), # separate space from before the coordinates
too_many = "merge") %>%
separate_wider_delim(coords, delim = " ", names = c("Longitude", "Latitude"), # split lat and long coords
too_many = "merge") %>%
select(-empty) %>%
st_as_sf(coords = c("Longitude", "Latitude"), # make into geometry object
crs = st_crs(la_tract)) %>%
st_join(la_tract) %>%
mutate(site_community = str_to_title(site_community))Assigning income categories to households
Create income breaks for households above 8 (from 9-20)
# very low income
size <- seq(9, 20, 1)
very_low <- data.frame()
row_n <- data.frame()
for (i in seq_along(size)) {
income <- 1.32 + (i*.08)
limit <- income*56300
row <- size[i]
row_n <- rbind(row_n, row)
very_low = rbind(very_low, limit)
}
very_low <- cbind(row_n, very_low) %>%
rename(people = X9,
very_low = X78820)
extreme <- data.frame()
for (i in seq_along(size)) {
# https://www.huduser.gov/portal/datasets/il/il2020/2020ILCalc3080.odn#calculator
income <- 44120 + (i*4480)
extreme <- rbind(extreme, income)
}
low <- data.frame()
for (i in seq_along(size)) {
income <- 1.32 + (i*.08)
limit <- income*90100
low <- rbind(low, limit)
}
cols_9_20 <- cbind(extreme, very_low) %>%
rename(extreme_low = X48600) %>%
cbind(low) %>%
rename(low = X126140)
income_breaks <- rbind(income_breaks, cols_9_20)Creating For Loop
Applying for loop to the whole data set
puma_22$status <- rep(NA, nrow(puma_22))
for (person in 1:nrow(puma_22)) {
individual <- puma_22$NP[person]
income <- income_breaks %>%
filter(people == individual)
#print(individual)
#print(individual)
# Define thresholds for labels
extreme_threshold <- income[[2]]
very_low_threshold <- income[[3]]
low_threshold <- income[[4]]
# # Assign labels based on thresholds
puma_22$status[person] <- ifelse(puma_22[["HINCP"]][person] <= extreme_threshold,
"extreme",
ifelse(puma_22[["HINCP"]][person] > extreme_threshold & puma_22[["HINCP"]][person] <= very_low_threshold,
"very low",
ifelse(puma_22[["HINCP"]][person] > very_low_threshold & puma_22[["HINCP"]][person] <= low_threshold,
"low",
ifelse(puma_22[["HINCP"]][person] > low_threshold, "not eligible", "non"))))
}Mapping census tracts in LA
puma_geo <- puma_la %>%
select(PUMA, geometry)
puma_plot <- puma_22 %>%
group_by(PUMA) %>%
count(status) %>%
pivot_wider(names_from = "status", values_from = "n") %>%
ungroup() %>%
clean_names() %>%
select(puma, extreme, very_low, low, not_eligible) %>%
mutate(PUMA = puma,
total_households = rowSums(.[2:5]),
total_eligible = rowSums(.[2:4]),
percent_eligible = (total_eligible/total_households) * 100 ) %>%
left_join(puma_geo) %>%
st_as_sf()
# crop puma plot with the la_tract to remove the top part of LA
puma_plot_crop <- st_crop(puma_plot, la_tract)
# has 2196 points
tract_neighborhood <- read_csv(here("data/Census_Tract_Locations__LA__20240206.csv")) %>%
mutate(census_tract = gsub(", Los Angeles County, California", # elements that you want to remove
"", # replace with blank
Tract)) %>%
select(-Tract, census_tract, Neighborhood, Latitude, Longitude) %>%
st_as_sf(coords = c("Longitude", "Latitude"), # make into geometry object
crs = st_crs(puma_plot)) %>%
st_crop(puma_plot_crop) # neighborhoods only in the census tracts of interest
# make a df with the puma information and census tract geometry
puma_tract <- puma_plot_crop %>% # has 67 observations
st_join(tract_neighborhood,
join = st_intersects) %>% # has the same number of points as the neighborhood
clean_names() %>%
select(neighborhood, census_tract, location, puma, geometry) %>%
mutate(census_tract = gsub("Census Tract ", # elements that you want to remove
"", # replace with blank
census_tract))
# rename the tract column
la_tract <- la_tract %>%
rename(census_tract = NAME) %>%
select(census_tract, geometry)
puma_tract <- puma_tract %>%
st_drop_geometry() %>%
left_join(la_tract)
puma_eligibility <- puma_plot %>%
st_drop_geometry() %>%
select(!PUMA)
# make map of the census tracts with the puma household data
tract_eligibility <- puma_tract %>%
left_join(puma_eligibility, by = "puma") %>%
filter(!st_is_empty(geometry)) %>%
st_as_sf()Looking at census tract and the percent of income spent on rent: Rent Burdened
rent_burden <- puma_22 %>%
group_by(PUMA) %>%
rename(percent_rent = GRPIP) %>%
select(WGTP, percent_rent, PUMA) %>%
mutate(status = ifelse(percent_rent >= 30, "burden", "not_burden")) %>%
count(status) %>%
pivot_wider(names_from = "status", values_from = "n") %>%
ungroup() %>%
rename(puma = PUMA) %>%
mutate(total = burden+not_burden,
percent_burden = (burden/total)*100)
#plot(rent_burden_2$geometry)
rent_burdened <- puma_tract %>%
left_join(rent_burden) %>%
filter(!st_is_empty(geometry)) %>%
st_as_sf()Creating a custom theme
library(monochromeR)
library(showtext)
library(ggtext)
library(ggrepel)
# import google fonts
font_add_google(name = "Josefin Sans",
family = "josefin") # name we provide ggplot
font_add_google(name = "Sen",
family = "sen") # name we provide ggplot
font_add_google(name = "Tenor Sans",
family = "tenorSans") # name we provide ggplot
# Pairs
font_add_google(name = "Playfair Display",
family = "playfairDisplay") # name we provide ggplot
font_add_google(name = "Alice",
family = "alice") # name we provide ggplot
# enable show text here that configures font across platforms
showtext_auto()
my_theme <- theme(
plot.title = element_text(size = 20, face = "bold", family = "playfairDisplay", hjust = 0.5), # Font size set to 16 and bold.
plot.caption.position = "plot",
plot.caption = element_text(size = 10, hjust = 0, family = "alice"),
axis.title = element_text(size = 14), # Font size set to 12.
axis.text = element_blank(), # no axis text.
axis.ticks = element_blank(),
legend.title = element_text(size = 15, face = "bold", family = "alice"), # Font size of the title of the legend set to 12 and bold.
legend.text = element_text(size = 10), # font size of the text in the legend
panel.background = element_rect(fill = "white"), # color of the panel background
panel.grid.major = element_blank(),
plot.margin = margin(12, 6, 12, 18),
panel.grid.minor = element_blank(), # invisible auxiliary grids
plot.background = element_rect(fill = "white"), # plot's background
strip.background = element_rect(fill = "white", color = "#ff975d"), # strip background
strip.text = element_text(size = 12, face = "bold", family = "alice"), # strip texts
legend.position = "top", # position of the legend
legend.box.background = element_rect(color = "white"), # background of the plot
legend.key.size = unit(1, "cm") # size of the legends key
)Figures
my_brew_palette15 <- RColorBrewer::brewer.pal(n = 20, name = 'YlGnBu')
my_brew_palette10 <- RColorBrewer::brewer.pal(n = 10, name = 'RdBu')
my_brew_palette5 <- RColorBrewer::brewer.pal(n = 10, name = 'Reds')
# eligibility of housing in LA
tract_eligibility_percent <- ggplot(tract_eligibility) +
geom_sf(aes(fill = percent_eligible)) +
scale_fill_stepsn(colors = my_brew_palette5,
labels = scales::label_percent(scale = 1),
breaks = scales::breaks_width(width = 7)) +
labs(title = "Exploring Affordable Housing Needs in Los Angeles County, CA",
fill = "Percent Eligible",
caption = "Households Eligible for Affordable Housing by PUMA in Los Angeles, CA") +
guides(fill = guide_colorbar(barheight = unit(1, units = "mm"),
barwidth = unit(100, units = "mm"),
direction = "horizontal",
ticks.colour = "grey20",
title.position = "top",
label.position = "bottom",
title.hjust = 0.5)) +
my_theme
rent_burden_plot <- ggplot(rent_burdened) +
geom_blank() +
geom_sf(aes(fill = percent_burden)) +
scale_fill_stepsn(colors = my_brew_palette5,
breaks = scales::breaks_width(width = 10),
labels = scales::label_percent(scale = 1)) +
labs(title = "Exploring Affordable Housing Needs in Los Angeles County, CA",
fill = "Percent Burdened",
caption = "Percent of Households Rent Burdened by Census Tract in Los Angeles, CA (2022). Dots represent location of affordable housing projects from 2003-2020.") +
guides(fill = guide_colorbar(barheight = unit(1, units = "mm"),
barwidth = unit(100, units = "mm"),
direction = "horizontal",
ticks.colour = "grey20",
title.position = "top",
label.position = "bottom",
title.hjust = 0.5)) +
my_theme
burden_affordable_projects <- rent_burden_plot +
geom_sf(data = ah_clean, alpha = 0.5, size = 1)
tract_eligibility_percentburden_affordable_projectsSelecting the Census Tracts of interest
# joining the eligibility and rent data together by census tract
burden_eligible <- rent_burdened %>%
st_drop_geometry() %>%
left_join(tract_eligibility, by = "census_tract") %>%
st_as_sf()
top_burden <- burden_eligible %>%
slice_max(order_by = percent_burden,
n = 75)
# high burden, no affordable housing
burden_non <- top_burden %>%
slice_min(order_by = percent_burden,
n = 25)
# high burden, affordable housing
burden_ah <- burden_eligible %>%
slice_max(order_by = percent_burden,
n = 30)
plot(burden_ah$geometry)# low burden, with affordable housing - and also have high percentage eligible for affordable housing
lowburden_ah <- burden_eligible %>%
slice_max(order_by = percent_eligible,
n = 450) %>%
slice_min(order_by = percent_eligible,
n = 25)
plot(lowburden_ah$geometry)# low burden, no affordable housing - low percentage of households that qualify for affordable housing
lowburden_non <- burden_eligible %>%
slice_min(order_by = percent_eligible,
n = 100) %>%
slice_min(order_by = percent_eligible,
n = 20)
plot(lowburden_non$geometry)# neighborhoods right next to undeserved geometry represented by `burden_non`
interest <- burden_eligible %>%
slice_min(order_by = percent_burden,
n = 600) %>%
slice_max(order_by = percent_burden,
n = 15)
plot(interest$geometry)# %>%
# st_union()Plotting the three comparison neighborhoods
# high burden, no affordable housing
burden_non_plot <- burden_non %>%
st_union() %>%
ggplot() +
geom_sf() +
labs(title = "Neighborhoods with High Rent Burden, \nNo Affordable Housing") +
my_theme
# high burden, affordable housing
burden_ah_plot <- burden_ah %>%
st_union() %>%
ggplot() +
geom_sf() +
labs(title = "Neighborhoods with High Rent Burden, \nWith Affordale Housing") +
my_theme
# low burden, with affordable housing - and also have high percentage eligible for affordable housing
lowburden_ah_plot <- lowburden_ah %>%
st_union() %>%
ggplot() +
geom_sf() +
labs(title = "Neighborhoods with Low Rent Burden, \nWith Affordale Housing",
subtitle = "and also have high percentage eligible for affordable housing") +
my_theme
# ow percentage of households that qualify for affordable housing
lowburden_non_plot <- lowburden_non %>%
st_union() %>%
ggplot() +
geom_sf() +
labs(title = "Neighborhoods with Low Rent Burden, \nNo Affordale Housing",
subtitle = "low percentage of households that qualify for affordable housing") +
my_theme
interest_plot <- interest %>%
st_union() %>%
ggplot() +
geom_sf() +
labs(title = "Neighborhood with low rent burden right next \nto neighborhood with high rent burden and no support",
subtitle = "50% percentage of households that qualify for affordable housing, less than 20% rent burden") +
my_theme
burden_non_plot / burden_ah_plot / lowburden_ah_plot / lowburden_non_plot /
interest_plotbase_map <- ggplot(rent_burdened) +
geom_blank() +
geom_sf(aes(fill = percent_burden), color = "black", lwd = 0.05) +
scale_fill_stepsn(colors = my_brew_palette5,
breaks = scales::breaks_width(width = 10),
labels = scales::label_percent(scale = 1)) +
labs(title = "Exploring Affordable Housing Needs in Los Angeles County, CA",
fill = "Percent Burdened") +
guides(fill = guide_colorbar(barheight = unit(1, units = "mm"),
barwidth = unit(100, units = "mm"),
direction = "horizontal",
ticks.colour = "grey20",
title.position = "top",
label.position = "bottom",
title.hjust = 0.5)) +
my_theme +
geom_sf(data = ah_clean, alpha = 0.5, size = 1)
# burden_non points
burden_non_pt <- data.frame(
y = c(33.9884335),
x = c(-118.1758795),
yend = c(33.77222),
xend = c(-117.92694)
)
# burden_ah points
burden_ah_pt <- data.frame(
y = c(34.0597395),
x = c(-118.2982),
yend = c(33.93291),
xend = c(-118.66136)
# yend = c(33.62815),
# xend = c(- 118.30718)
)
lowburden_ah_pt <- data.frame(
y = c(34.245882),
x = c(-118.4209225),
yend = c(34.31389),
xend = c(-118.12637)
)
lowburden_non_pt <- data.frame(
y = c(33.860177),
x = c(-118.388831),
yend = c(33.82235),
xend = c(-118.53424)
)
# add the lines to the plot in segments - still editing this
base_map +
geom_curve(data = burden_non_pt,
aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(length = unit(0.01, "npc")),
#linetype = "dashed",
curvature = -0.5,
color = "grey") +
geom_curve(data = burden_ah_pt,
aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(length = unit(0.01, "npc")),
curvature = 0.5,
color = "grey") +
geom_curve(data = lowburden_ah_pt,
aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(length = unit(0.01, "npc")),
curvature = -0.5,
color = "grey") # geom_segment(data = lowburden_non_pt,
# aes(x = x, y = y, xend = xend, yend = yend),
# linetype = "dashed",
# color = "grey")RColorBrewer::brewer.pal(n=10,"Reds")[1] "#FFF5F0" "#FEE0D2" "#FCBBA1" "#FC9272" "#FB6A4A" "#EF3B2C" "#CB181D"
[8] "#A50F15" "#67000D"
order_2015 <- data.frame(
y = c(200000000),
x = c(2010),
yend = c(50000000),
xend = c(2015)
)
# looking at the total construction of affordable housing in each year
lahd_funding <- ah_clean %>%
drop_na() %>%
group_by(year) %>%
summarize(total_funding = sum(lahd_funded)) %>%
ggplot(aes(x = year, y = total_funding)) +
geom_col(fill = "#67000D") +
scale_y_continuous(expand=c(0,0),
labels = function(x) paste0(x / 1e6, "M")) +
geom_curve(data = order_2015,
aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(length = unit(0.01, "npc")),
size = 0.1,
curvature = -0.5,
color = "black") +
annotate(geom = "text", x = 2009, y = 200000000,
label = "2015 Directive Order \nfrom Mayor XYZ to facilitate the building \nof affordable housing.") +
theme_classic() +
labs(x = 'Year',
y = 'Total Funding (in millions)')
lahd_fundingNext steps
- get information for these areas
- Number of households (approximate)
- How has that changed since 2003
Look at the industry / jobs of people in the census tract
This data does not exist
# load list of variables that I could access
variables <- load_variables(2022, "acs1", cache = TRUE)
#B24114 is for Detailed Occupation for the Civilian Employed Population 16 Years and Over
#B24115 is for Detailed Occupation for the Civilian Employed Population 16 Years and Over - Male
#B24116 is for Detailed Occupation for the Civilian Employed Population 16 Years and Over - Female
B24114 <- variables %>%
filter(str_detect(name, "B24114"))
#B24114$name
la_tract_occupation <- get_acs(
state = "CA",
county = "Los Angeles",
geography = "state",
variables = "B24114_100",
geometry = TRUE,
year = 2021
)
|
| | 0%
|
|= | 2%
|
|===== | 7%
|
|====== | 8%
|
|======= | 10%
|
|========= | 13%
|
|============ | 18%
|
|============= | 19%
|
|================== | 26%
|
|======================= | 33%
|
|============================= | 41%
|
|================================= | 47%
|
|======================================== | 57%
|
|============================================= | 65%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|======================================================== | 81%
|
|============================================================== | 88%
|
|=============================================================== | 89%
|
|=================================================================== | 96%
|
|======================================================================| 100%
v <- data(acs5_geography)Try this that looks at the number of workers in household based on size
# B08202 Household Size by Number of Workers in Household
B08202 <- variables %>%
filter(str_detect(name, "B08202"))
la_tract_workers <- get_acs(
state = "CA",
county = "Los Angeles",
geography = "state",
variables = B08202$name,
geometry = TRUE,
year = 2022
)
|
| | 0%
|
|=== | 4%
|
|==== | 6%
|
|======= | 10%
|
|============= | 18%
|
|================== | 26%
|
|======================= | 33%
|
|======================== | 34%
|
|========================= | 35%
|
|=========================== | 38%
|
|============================== | 42%
|
|================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|===================================== | 53%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|===================================================================== | 98%
|
|======================================================================| 99%
|
|======================================================================| 100%
Look at the average age / demographic data by tract
https://walker-data.com/tidycensus/articles/basic-usage.html
ah_tracts <- unique(ah_clean$NAME)# B01001A is Sex by Age (White Alone)
# B01001B is Sex by Age (Black or African American Alone)
# B01001
B01001 <- variables %>%
filter(str_detect(name, "B01001"))
# has the de
la_tract_age <- get_acs(
state = "CA",
county = "Los Angeles",
geography = "tract",
variables = B01001$name,
geometry = TRUE,
year = 2022
)
tract_ethnicity <- la_tract_age %>%
mutate(NAME = gsub("; Los Angeles County; California", # elements that you want to remove
"", # replace with blank
NAME)) %>%
mutate(NAME = gsub("Census Tract ", # elements that you want to remove
"", # replace with blank
NAME)) %>%
rename(census_tract = NAME)
#unique(tract_ethnicity$census_tract)
# - make a list of the census tracts of interest
on_plot <- lowburden_ah %>%
mutate(area = "lowburden_ah") %>%
rbind(burden_ah %>% mutate(area = "burden_ah")) %>%
rbind(burden_non %>% mutate(area = "burden_non")) %>%
rbind(lowburden_non %>% mutate(area = "lowburden_non")) %>%
select(census_tract, area)
# - filter the tracts in the age / race df
sex_age_ethnicity <- tract_ethnicity %>%
# - replace the variables with more intuitive information
mutate(variable = gsub("B01001A",
"white",
variable),
variable = gsub("B01001B",
"black",
variable),
variable = gsub("B01001C",
"ai", # american indian and alaska native
variable),
variable = gsub("B01001D",
"asian",
variable),
variable = gsub("B01001E",
"haw", # hawaiian and pacific islander
variable),
variable = gsub("B01001F",
"other",
variable),
variable = gsub("B01001G",
"two", # two or more
variable),
variable = gsub("B01001H",
"wn", # white non hispanic and latino
variable),
variable = gsub("B01001I",
"hisp", # hispanic and latino
variable),
variable = gsub("B01001_",
"total_",
variable)) %>%
separate_wider_delim(cols = 3,
delim = "_",
names = c("group", "age_variable"))
#plot(ethnicity$geometry)
#unique(all_eth$census_tract)
#unique(ah_tracts_eth$census_tract)
ah_tracts_eth <- sex_age_ethnicity %>%
filter(census_tract %in% ah_tracts) %>%
filter(age_variable == "001") %>%
select(-c(GEOID, age_variable, moe, geometry)) %>%
mutate(status = "affordable")
# pivot_wider(names_from = group,
# values_from = estimate)
ah_tot <- ah_tracts_eth %>%
filter(group == "total") %>%
select(census_tract, group, estimate) %>%
rename(totals = group,
total_est = estimate)
ah_tracts_eth <- ah_tracts_eth %>%
filter(group != "total") %>%
left_join(ah_tot, by = "census_tract") %>%
mutate(percent = round((estimate/total_est)*100, 1)) %>%
select(census_tract, group, estimate, percent, status)
all_eth <- sex_age_ethnicity %>%
filter(census_tract %in% rent_burdened$census_tract) %>%
filter(age_variable == "001") %>%
select(-c(GEOID, age_variable, moe, geometry)) %>%
mutate(status = "non")
all_tot <- all_eth %>%
filter(group == "total") %>%
select(census_tract, group, estimate) %>%
rename(totals = group,
total_est = estimate)
all_eth <- all_eth %>%
filter(group != "total") %>%
left_join(all_tot, by = "census_tract") %>%
mutate(percent = round((estimate/total_est)*100, 1)) %>%
select(census_tract, group, estimate, percent, status)
# combine the tracts
compare <- all_eth %>%
rbind(ah_tracts_eth) %>%
rename(groups = group) %>%
filter(!groups %in% c("ai", "haw", "two"))showtext_auto()
# Custom labels for x-axis
custom_labels <- c("White (non Hispanic or Latino)", "White", "Other", "Hispanic", "Black", "Asian")
compare_plot <- ggplot(compare) +
geom_boxplot(aes(x = groups, y = percent, fill = status),
color = "white") +
scale_fill_manual(values = c("#67000D", "#c19089")) +
coord_flip()
compare_plot +
labs(y = "Percent of Census Tract",
x = "",
fill = "",
subtitle = "The demographics of the census tracts in all of the <span style = 'color: #c19089; font-size: 20pt;'>**Los Angeles Metropolitan Area**</span> vs tracts that <span style = 'color: #67000D; font-size: 20pt;'>**have built affordable housing**</span> since 2003.") +
scale_x_discrete(labels = rev(custom_labels)) +
theme_minimal()# subtitle = "The demographics of the census tracts in all of the <span style = 'color: #c19089; font-size: 20pt;'>**Los Angeles Metropolitan Area**</span> vs tracts that <span style = 'color: #67000D; font-size: 20pt;'>**have built affordable housing**</span> since 2003.")# for clarity: the estimate:total is the sum of the total male and the total female: could be used for total number of that ethnicity
ethnicity <- sex_age_ethnicity %>%
filter(census_tract %in% on_plot$census_tract) %>%
filter(age_variable == "001") %>%
select(-c(GEOID, age_variable, moe))
sum_on_plot <- on_plot %>%
left_join(ethnicity, by = "census_tract") %>%
select(-census_tract) %>%
st_drop_geometry() %>%
group_by(area, group) %>%
summarize(estimate = sum(estimate))
ggplot() +
geom_col(data = sum_on_plot, aes(x = area, y = estimate, fill = group),
position = "dodge")Making shapefiles for the census tracts
# high burden, no affordable housing
# burden_non_shp
burden_non %>%
st_drop_geometry() %>%
head(1) %>%
select(extreme, very_low, low, not_eligible, total_households, # sum of the other columns
percent_eligible
) %>%
mutate(p_ex = round((extreme/total_households)*100, 0),
p_vl = round((very_low/total_households)*100, 0),
p_l = round((low/total_households)*100, 0),
p_non = round((not_eligible/total_households)*100, 0)) %>%
select(p_ex, p_vl, p_l, p_non) %>%
pivot_longer(cols = 1:4,
names_to = "status",
values_to = "percent") %>%
mutate(name = "low_burden") %>%
ggplot() +
geom_bar(aes(x = 1, y = percent, fill = status),
stat = "identity", position = "stack") +
scale_fill_viridis(discrete = TRUE) +
theme_minimal() +
labs(x = NULL, y = "Percentage", title = "Not Burdened")
# high burden, affordable housing
#buden_ah_shp <-
burden_ah %>%
st_drop_geometry() %>%
head(1) %>%
select(extreme, very_low, low, not_eligible, total_households, # sum of the other columns
percent_eligible
) %>%
mutate(p_ex = round((extreme/total_households)*100, 0),
p_vl = round((very_low/total_households)*100, 0),
p_l = round((low/total_households)*100, 0),
p_non = round((not_eligible/total_households)*100, 0)) %>%
select(p_ex, p_vl, p_l, p_non) %>%
pivot_longer(cols = 1:4,
names_to = "status",
values_to = "percent") %>%
mutate(name = "low_burden") %>%
ggplot() +
geom_bar(aes(x = 1, y = percent, fill = status),
stat = "identity", position = "stack") +
scale_fill_viridis(discrete = TRUE) +
theme_minimal() +
labs(x = NULL, y = "Percentage", title = "Burden with AH")
# low burden, with affordable housing - and also have high percentage eligible for affordable housing
#low_burden_shp <-
library(viridis)
lowburden_ah %>%
st_drop_geometry() %>%
head(1) %>%
select(extreme, very_low, low, not_eligible, total_households, # sum of the other columns
percent_eligible
) %>%
mutate(p_ex = round((extreme/total_households)*100, 0),
p_vl = round((very_low/total_households)*100, 0),
p_l = round((low/total_households)*100, 0),
p_non = round((not_eligible/total_households)*100, 0)) %>%
select(p_ex, p_vl, p_l, p_non) %>%
pivot_longer(cols = 1:4,
names_to = "status",
values_to = "percent") %>%
mutate(name = "low_burden",
ypos = c(50, 95, 75, 17), # location of labels on the plot
status = as.factor(status)) %>%
ggplot() +
geom_bar(aes(x = 1, y = percent, fill = reorder(status, percent)),
stat = "identity", position = "stack") +
# Add text
geom_text(
aes(1, y = ypos, label = reorder(status, percent)),
hjust = 0,
nudge_x = 0,
colour = "white",
size = 7,
family = "alice"
) +
scale_fill_viridis(discrete = TRUE) +
labs(x = NULL, y = "Percentage", title = "Low Burden with AH") +
plot_theme# Load necessary libraries
library(ggplot2)
library(XML)
library(svglite)
# Example data
data <- data.frame(
category = c("A", "B", "C"),
value1 = c(10, 20, 30),
value2 = c(15, 25, 35)
)
# Create SVG plot
svg("file:///Users/lunacatalan/Downloads/8z5bfP01.svg", width = 500, height = 500)
# Read SVG image outline
svg_image <- xmlParse("file:///Users/lunacatalan/Downloads/8z5bfP01.svg")
# Extract the image dimensions
svg_dims <- xmlAttrs(xmlRoot(svg_image))
# Create the data visualization on top of the SVG image
plot_gg<- ggplot(data, aes(x = category)) +
geom_bar(aes(y = value1, fill = "Value1"), stat = "identity") +
geom_bar(aes(y = value2, fill = "Value2"), stat = "identity") +
scale_fill_manual(values = c("Value1" = "blue", "Value2" = "red")) +
theme_minimal()
# Retrieve the plot
plot <- ggplot_gtable(ggplot_build(plot_gg))
# Embed the SVG image outline as the background
print(xmlSVG(svg_image), vp = viewport(width = as.numeric(svg_dims["width"]),
height = as.numeric(svg_dims["height"]),
xmax = 1, ymax = 1,
just = c("left", "bottom")))
# Print the data visualization on top of the SVG image
grid.draw(plot)
# End SVG plot
dev.off()